<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
  <head>

    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <meta content="Cask Data, Inc." name="author" />
<meta content="Copyright © 2017 Cask Data, Inc." name="copyright" />
<meta content="The CDAP User Guide: Getting Started" name="description" />


    <meta name="git_release" content="6.1.1">
    <meta name="git_hash" content="05fbac36f9f7aadeb44f5728cea35136dbc243e5">
    <meta name="git_timestamp" content="2020-02-09 08:22:47 +0800">
    <title>示例: 分析和物联网设备数据脱敏</title>

    <link rel="stylesheet" href="../_static/cdap-bootstrap.css" type="text/css" />
    <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
    <link rel="stylesheet" href="../_static/bootstrap-3.3.6/css/bootstrap.min.css" type="text/css" />
    <link rel="stylesheet" href="../_static/bootstrap-3.3.6/css/bootstrap-theme.min.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/bootstrap-sphinx.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-dynamicscrollspy-4.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/jquery.mCustomScrollbar.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-jquery.mCustomScrollbar.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/abixTreeList-2.css" type="text/css" />
    <link rel="stylesheet" href="../_static/cdap-bootstrap.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-hide-toc.css" type="text/css" />

    <script type="text/javascript">
      var DOCUMENTATION_OPTIONS = {
        URL_ROOT:    '',
        VERSION:     '6.1.1',
        COLLAPSE_INDEX: false,
        FILE_SUFFIX: '.html',
        HAS_SOURCE:  false
      };
    </script>
    <script type="text/javascript" src="../_static/jquery.js"></script>
    <script type="text/javascript" src="../_static/underscore.js"></script>
    <script type="text/javascript" src="../_static/doctools.js"></script>
    <script type="text/javascript" src="../_static/language_data.js"></script>

    <link rel="shortcut icon" href="../_static/favicon.ico"/>
    <link rel="index" title="Index" href="../genindex.html" />
    <link rel="search" title="Search" href="../search.html" />
    <link rel="top" title="Cask Data Application Platform 6.1.1 Documentation" href="../index.html" />
    <link rel="up" title="CDAP 入门指南" href="index.html" />
    <link rel="next" title="数据预处理文档" href="../data-preparation/index.html" />
    <link rel="prev" title="Example: Building a Stock Selection Pipeline" href="stocks.html" />
    <!-- block extrahead -->
    <meta charset='utf-8'>
    <meta http-equiv='X-UA-Compatible' content='IE=edge,chrome=1'>
    <meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1'>
    <meta name="apple-mobile-web-app-capable" content="yes">
    <!-- block extrahead end -->

</head>
<body role="document">

<!-- block navbar -->
<div id="navbar" class="navbar navbar-inverse navbar-default navbar-fixed-top">
    <div class="container-fluid">
      <div class="row">
        <div class="navbar-header">
          <!-- .btn-navbar is used as the toggle for collapsed navbar content -->
          <a class="navbar-brand" href="../table-of-contents/../../index.html">
            <span><img alt="CDAP logo" src="../_static/cdap_logo.svg"/></span>
          </a>

          <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".nav-collapse">
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
          </button>

          <div class="pull-right">
            <div class="dropdown version-dropdown">
              <a href="#" class="dropdown-toggle" data-toggle="dropdown"
                role="button" aria-haspopup="true" aria-expanded="false">
                v 6.1.1 <span class="caret"></span>
              </a>
              <ul class="dropdown-menu">
                <li><a href="//docs.cdap.io/cdap/5.1.2/en/index.html">v 5.1.2</a></li>
                <li><a href="//docs.cdap.io/cdap/4.3.4/en/index.html">v 4.3.4</a></li>
              </ul>
            </div>
          </div>
          <form class="navbar-form navbar-right navbar-search" action="../search.html" method="get">
            <div class="form-group">
              <div class="navbar-search-image material-icons"></div>
              <input type="text" name="q" class="form-control" placeholder="  Search" />
            </div>
            <input type="hidden" name="check_keywords" value="yes" />
            <input type="hidden" name="area" value="default" />
          </form>

          <div class="collapse navbar-collapse nav-collapse navbar-right navbar-navigation">
            <ul class="nav navbar-nav"><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../index.html">简介</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link current" href="../table-of-contents/../../guides.html">手册</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../reference-manual/index.html">参考</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../faqs/index.html">帮助</a></li>
            </ul>
          </div>

        </div>
      </div>
    </div>
  </div><!-- block navbar end -->
<!-- block main content -->
<div class="main-container container">
  <div class="row"><div class="col-md-2">
      <div id="sidebar" class="bs-sidenav scrollable-y-outside" role="complementary">
<!-- theme_manual: user-guide -->
<!-- theme_manual_highlight: guides -->
<!-- sidebar_title_link: ../table-of-contents/../../guides.html -->

  <div role="note" aria-label="manuals links"><h3><a href="../table-of-contents/../../guides.html">Guides</a></h3>

    <ul class="this-page-menu">
      <li class="toctree-l1"><b><a href="../table-of-contents/../../user-guide/index.html" rel="nofollow">用户手册</a></b>
      <nav class="pagenav">
      <ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../index.html"> 简介</a></li>
<li class="toctree-l1"><a class="reference internal" href="../overview.html"> 概述</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="index.html"> 入门指南</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="campaign.html">MySQL 客户数据</a></li>
<li class="toctree-l2"><a class="reference internal" href="nytimes-xml.html">纽约时报 XML 数据推送</a></li>
<li class="toctree-l2"><a class="reference internal" href="stocks.html">股票选择</a></li>
<li class="toctree-l2 current"><a class="current reference internal" href="#">物联网 IoT 设备数据</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../data-preparation/index.html"> 数据预处理</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/concepts.html">      概念</a></li>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/directives/index.html">      数据处理指令</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/catalog-lookup.html">catalog-lookup</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/change-column-case.html">change-column-case</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/changing-case.html">changing-case</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/cleanse-column-names.html">cleanse-column-names</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/columns-replace.html">columns-replace</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/copy.html">copy</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/cut-character.html">cut-character</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/decode.html">decode</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/diff-date.html">diff-date</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/drop.html">drop</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/encode.html">encode</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/extract-regex-groups.html">extract-regex-groups</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/fail.html">fail</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/fill-null-or-empty.html">fill-null-or-empty</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/filter-row-if-matched.html">filter-row-if-matched</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/filter-row-if-true.html">filter-row-if-true</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/filter-rows-on.html">filter-rows-on</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/find-and-replace.html">find-and-replace</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/flatten.html">flatten</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/format-date.html">format-date</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/format-unix-timestamp.html">format-unix-timestamp</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/generate-uuid.html">generate-uuid</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/hash.html">hash</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/increment-variable.html">increment-variable</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/index-split.html">index-split</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/invoke-http.html">invoke-http</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/json-path.html">json-path</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/keep.html">keep</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/mask-number.html">mask-number</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/mask-shuffle.html">mask-shuffle</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/merge.html">merge</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-avro-file.html">parse-as-avro-file</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-avro.html">parse-as-avro</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-csv.html">parse-as-csv</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-date.html">parse-as-date</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-excel.html">parse-as-excel</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-fixed-length.html">parse-as-fixed-length</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-hl7.html">parse-as-hl7</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-json.html">parse-as-json</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-log.html">parse-as-log</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-simple-date.html">parse-as-simple-date</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-xml.html">parse-as-xml</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-timestamp.html">parse-timestamp</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-xml-to-json.html">parse-xml-to-json</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/quantize.html">quantize</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/rename.html">rename</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/send-to-error.html">send-to-error</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-charset.html">set-charset</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-column.html">set-column</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-columns.html">set-columns</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-record-delim.html">set-record-delim</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-type.html">set-type</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-variable.html">set-variable</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/split-by-separator.html">split-by-separator</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/split-email.html">split-email</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/split-to-columns.html">split-to-columns</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/split-to-rows.html">split-to-rows</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/split-url.html">split-url</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/stemming.html">stemming</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/swap.html">swap</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/table-lookup.html">table-lookup</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/text-distance.html">text-distance</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/text-metric.html">text-metric</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/trim.html">trim</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/url-decode.html">url-decode</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/url-encode.html">url-encode</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/write-as-csv.html">write-as-csv</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/write-as-json-map.html">write-as-json-map</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/write-as-json-object.html">write-as-json-object</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/xpath.html">xpath</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/functions/index.html">      函数</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/json-functions.html">JSON 函数</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/type-functions.html">类型函数</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/geofence-functions.html">地理围栏函数</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/dq-functions.html">数据质量函数</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/date-functions.html">日期函数</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/ddl-functions.html">DDL 函数</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/service/index.html">      服务</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/admin.html">行政和管理服务</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/connection-properties.html">连接属性</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/connections.html">连接服务</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/execution.html">数据处理指令执行</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/request.html">请求格式规范</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/schema-registry.html">Schema 注册库</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/services.html">数据预处理服务</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/performance.html">性能</a></li>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/exclusion-and-aliasing.html">排除与别名</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../pipelines/index.html"> 数据流管道</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/concepts-design.html"> 概念与设计</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/getting-started.html"> 入门指南</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/studio.html"> CDAP 数据流设计器</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/creating-pipelines.html"> 创建数据流管道</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/running-pipelines.html"> 运行数据流管道</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/plugin-management.html"> 插件管理</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/plugins/index.html"> 插件参考</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/actions/index.html"> Action Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/sources/index.html"> Source Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/transforms/index.html"> Transform Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/analytics/index.html"> Analytic Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/sinks/index.html"> Sink Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/shared-plugins/index.html"> Shared Plugins</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../pipelines/plugins/shared-plugins/core.html">CoreValidator</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/post-run-plugins/index.html"> Post-run Plugins</a><ul class="simple">
</ul>
</li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../mmds/index.html"> 数据分析</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../mmds/concepts.html"> Concepts</a></li>
<li class="toctree-l2"><a class="reference internal" href="../mmds/feature-gen.html"> Feature Generation</a></li>
<li class="toctree-l2"><a class="reference internal" href="../mmds/modeling.html"> Modeling</a></li>
<li class="toctree-l2"><a class="reference internal" href="../mmds/example.html"> Example</a></li>
</ul>
</li>
</ul>
</nav>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../developer-manual/index.html" rel="nofollow">开发手册</a>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../admin-manual/index.html" rel="nofollow">管理手册</a>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../integrations/index.html" rel="nofollow">集成手册</a>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../examples-manual/index.html" rel="nofollow">最佳实践</a>
      </li>
    </ul>
  </div></div>
    </div><div class="col-md-8 content" id="main-content">
    
  <div class="section" id="tutorials-fitbit">
<span id="id1"></span><h1>示例: 分析和物联网设备数据脱敏<a class="headerlink" href="#tutorials-fitbit" title="Permalink to this headline">🔗</a></h1>
<div class="section" id="id2">
<h2>简介<a class="headerlink" href="#id2" title="Permalink to this headline">🔗</a></h2>
<p>本教程演示了如何使用 CDAP 的数据预处理和数据流管道来清理, 准备, 脱敏, 和存储以 JSON 格式发送的 IoT 设备数据.</p>
</div>
<div class="section" id="id3">
<h2>场景<a class="headerlink" href="#id3" title="Permalink to this headline">🔗</a></h2>
<p>您以 JSON 格式接收 FitBit 设备数据. 您想要与外部承包商共享这些数据,
但是您需要在共享之前对数据进行脱敏以删除个人识别信息(即, 数据脱敏).</p>
<ul class="simple">
<li>您将解析 FitBit JSON 数据, 提取 UNIX 时间戳, 对设备 ID 进行脱敏, 对结果进行哈希处理, 然后存储到 CDAP 表中</li>
<li>您需要将脱敏结果写入承包商要使用的数据库中</li>
</ul>
</div>
<div class="section" id="id4">
<h2>数据<a class="headerlink" href="#id4" title="Permalink to this headline">🔗</a></h2>
<p>单击下面的按钮, 下载一个 <cite>.json</cite> 文件, 其中包含完成本教程所需的数据.</p>
<p><a class="reference download internal" download="" href="../_downloads/cf60fcb120d337ca7b09b3084bd3861e/FitBit_Device.json"><code class="xref download docutils literal notranslate"><span class="pre">FitBit_Device.json</span></code></a></p>
</div>
<div class="section" id="id5">
<h2>视频教程<a class="headerlink" href="#id5" title="Permalink to this headline">🔗</a></h2>
<p>(暂未提供)
..  youtube:: V8e6yr8hpZA</p>
</div>
<div class="section" id="id6">
<h2>操作步骤<a class="headerlink" href="#id6" title="Permalink to this headline">🔗</a></h2>
<div class="section" id="id7">
<h3>加载数据<a class="headerlink" href="#id7" title="Permalink to this headline">🔗</a></h3>
<p>下载上方 <cite>数据</cite> 部分中链接的数据. 打开数据预处理, 然后将 <cite>FitBit_Device.json</cite> 作为 “File” 上传.</p>
<p>将数据加载到 <cite>body</cite> 列之后, 从 <cite>body</cite> 列的下拉菜单中选择 解析 &gt; JSON (深度 1).
这将创建一个包含数组中每个 JSON 对象的行.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/parse_body.jpeg"><img alt="../_images/parse_body.jpeg" class="bordered-image" src="../_images/parse_body.jpeg" style="width: 500px;" /></a>
</div>
<p>要将 JSON 字段拆分为列, 请再次对 <cite>body</cite> 列应用 解析 &gt; JSON 指令.</p>
<p>现在，您有了四列: <cite>body_device_id</cite>, <cite>body_calories_burnt</cite>, <cite>body_duration</cite>, <cite>body_timestamp</cite>.</p>
<p>通过选择下拉菜单并选择 <cite>删除数据列</cite> 来删除 <cite>body_duration</cite> 列. 您不需要此列, 因为每一行的值都是 <cite>60</cite>.</p>
</div>
<div class="section" id="id">
<h3>对设备 ID 进行脱敏<a class="headerlink" href="#id" title="Permalink to this headline">🔗</a></h3>
<p>此数据包含 FitBit 设备的 ID. 这是个人识别信息, 并可能损害提供数据的用户.
您希望脱敏此数据, 以确保重要的个人信息不会被恶意行为者窃取.</p>
<p>为此, 您可以从 <cite>body_device_id</cite> 列的下拉菜单中应用 <cite>数据脱敏</cite> 指令.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/mask.jpeg"><img alt="../_images/mask.jpeg" class="bordered-image" src="../_images/mask.jpeg" style="width: 500px;" /></a>
</div>
<p>选择 <cite>只显示最后 4 个字符</cite>. 这将用哈希表隐藏所有字符, 除了最后四个字符.
<cite>打乱顺序</cite> 将随机重排列中的数字/字符, 以使原始信息无法重建.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/masked_data.jpeg"><img alt="../_images/masked_data.jpeg" class="bordered-image" src="../_images/masked_data.jpeg" style="width: 500px;" /></a>
</div>
</div>
<div class="section" id="unix">
<h3>从 UNIX 时间戳获取时间<a class="headerlink" href="#unix" title="Permalink to this headline">🔗</a></h3>
<p><cite>body_timestamp</cite> 是一个 UNIX 时间戳格式, 它表示自 1970 年 1 月 1 日大纪元以来经过的秒数.</p>
<p>要查找数据时间, 可以通过取 86400 的模, 这是每天的秒数. 确切的操作是:</p>
<p><code class="docutils literal notranslate"><span class="pre">body_timestamp</span> <span class="pre">%</span> <span class="pre">86400</span></code></p>
<p>这将产生从午夜开始的时间.</p>
<p>首先, 您会注意到 <cite>body_timestamp</cite> 的类型是 String. 您不能对 String 执行数学运算!
要解决此问题, 您需要将 <cite>body_timestamp</cite> 转换为适当的数据类型, 例如 float.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/string.jpeg"><img alt="../_images/string.jpeg" class="bordered-image" src="../_images/string.jpeg" style="width: 250px;" /></a>
</div>
<p>为此, 请在屏幕底部的提示中键入以下指令:</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/float.jpeg"><img alt="../_images/float.jpeg" class="bordered-image" src="../_images/float.jpeg" style="width: 800px;" /></a>
</div>
<p><cite>set-type</cite> 指令用于不同数据类型之间的转换. 在这里, 您已将 String 转换为 Float,
用于表示浮点十进制数字.</p>
<p>现在您已将时间戳记设置为正确的数据类型, 你想要计算模数. 您可以使用 <cite>body_timestamp</cite> 下拉菜单应用模运算.
选择 <cite>计算</cite> &gt; <cite>模运算</cite>, 然后指定 86400.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/mod.jpeg"><img alt="../_images/mod.jpeg" class="bordered-image" src="../_images/mod.jpeg" style="width: 500px;" /></a>
</div>
<p>您将看到以下数据出现. 这些值表示午夜之后的秒数. 例如, 第一行包含值 3070.0,
这意味着在午夜之后 3070.0 seconds 秒时读取了 FitBit 数据.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/seconds_after.jpeg"><img alt="../_images/seconds_after.jpeg" class="bordered-image" src="../_images/seconds_after.jpeg" style="width: 300px;" /></a>
</div>
</div>
<div class="section" id="id8">
<h3>用于处理无效数据的检查选项<a class="headerlink" href="#id8" title="Permalink to this headline">🔗</a></h3>
<p>扫描数据, 您会看到并非所有数据都是有效的. 第 13 行的 <cite>body_calories_burnt</cite> 列值为 -7.
虽然绝对有可能燃烧掉负卡路里 (例如通过吃甜甜圈), 但很有可能是 FitBit 读取的错误信息.</p>
<p>在处理此错误数据之前, 需要将 <cite>body_calories_burnt</cite> 的数据类型从 String 更改为 Float.
您可以通过应用指令 <cite>set-type body_calories_burnt Float</cite> 来完成此操作.</p>
<p>现在, 您可以处理无效数据.</p>
<p>首先, 您将查看 <cite>send-to-error</cite> 指令. 当在数据流管道中处理记录时,
<cite>send-to-error</cite> 将记录标记为错误, 并导致该记录被写入错误节点, 而不是数据流中的下一个构.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/sendtoerror.jpeg"><img alt="../_images/sendtoerror.jpeg" class="bordered-image" src="../_images/sendtoerror.jpeg" style="width: 500px;" /></a>
</div>
<p>应用此指令时, 将看到以下内容:</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/nodata.jpeg"><img alt="../_images/nodata.jpeg" class="bordered-image" src="../_images/nodata.jpeg" style="width: 700px;" /></a>
</div>
<p>怎么回事?</p>
<p>在数据预处理中, 您摄取了一条 “记录,” 即 FitBit.json 文件. 尽管已将单个输入记录拆分为多个输出记录,
但它仍然是单个输入记录. 因此，当您应用 <a href="#id9"><span class="problematic" id="id10">`</span></a>send-to-error`时，会将整个记录标记为错误.
因此，没有数据显示.</p>
<p>如果您不希望整体接受数据(除非所有内容均有效), <cite>send-to-error</cite> 非常有用.</p>
<p>在这种情况下, 可以有一个错误的读数, 因为它不会影响分析的总体结果.</p>
<p>要删除 <cite>send-to-error</cite> 指令, 请导航到右侧栏，然后单击 <cite>send-to-error</cite> 旁边的 “x”, 它是指令编号 12.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/remove.jpeg"><img alt="../_images/remove.jpeg" class="bordered-image" src="../_images/remove.jpeg" style="width: 300px;" /></a>
</div>
<p>您的数据将重新出现.</p>
<p>您可以应用过滤器，而不是将记录发送到错误. 尽管这不会在数据流管道中生成错误记录, 但会在处理后删除无效数据.</p>
<p>从 <cite>body_calories_burnt</cite> 列选择 <cite>过滤器</cite>, 然后 <cite>删除数据行</cite> &gt; 使用 <cite>自定义条件</cite>. 将条件指定为小于零.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/filter_neg.jpeg"><img alt="../_images/filter_neg.jpeg" class="bordered-image" src="../_images/filter_neg.jpeg" style="width: 500px;" /></a>
</div>
<p>您将看到错误的行现在已被删除.</p>
</div>
<div class="section" id="id11">
<h3>编码数据以进行传输<a class="headerlink" href="#id11" title="Permalink to this headline">🔗</a></h3>
<p>使用 Base64 进行编码时，传输中的纯文本更能抵抗传输错误.
因此, 您想在对所有列使用 Base64 编码.</p>
<p>首先, 通过应用指令 <cite>set-type body_calories_burnt String</cite> 和 <cite>set-type body_timestamp String</cite> 将 <cite>body_calories_burnt</cite> 和 <cite>body_timestamp</cite> 转换回 string.</p>
<p>现在, 从任何列的下拉菜单中, 选择 <cite>编码</cite> &gt; <cite>Base64</cite>.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/encode.jpeg"><img alt="../_images/encode.jpeg" class="bordered-image" src="../_images/encode.jpeg" style="width: 300px;" /></a>
</div>
<p>对所有列重复此操作. 现在, 数据都以 Base64 格式编码了. 删除原始列.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/encoded_data.jpeg"><img alt="../_images/encoded_data.jpeg" class="bordered-image" src="../_images/encoded_data.jpeg" style="width: 500px;" /></a>
</div>
</div>
<div class="section" id="id12">
<h3>将结果存储在表中<a class="headerlink" href="#id12" title="Permalink to this headline">🔗</a></h3>
<p>最后, 您希望将数据写入 CDAP 表数据集, 该数据集可以导出并发送给将研究脱敏数据的分析人员.</p>
<p>CDAP 表数据集要求每一行都有唯一的标识符. 因为您已屏蔽了唯一的设备 ID,
所以最后 4 位的 Base64 编码可能相同. 为确保没有数据行被覆盖, 您将使用 <cite>generate-uuid</cite> 指令.</p>
<p>UUID 是唯一标识符. <cite>generate-uuid</cite> 为每一行生成一个 UUID. 在屏幕底部的提示中键入 <cite>generate-uuid uuid</cite>,
这将创建一个名为 <cite>uuid</cite> 的新列.</p>
<p>现在, 您可以将数据提取到 CDAP 表数据集中.</p>
<p>点击右上角的 <cite>融合数据</cite>.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/ingest.jpeg"><img alt="../_images/ingest.jpeg" class="bordered-image" src="../_images/ingest.jpeg" style="width: 250px;" /></a>
</div>
<p>选择 <cite>Table</cite> 并将该表命名为 “FitBitTable.” 应将 <cite>Row Key</cite> 指定为 <cite>uuid</cite>, 您知道该值是唯一的.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/ingest_config.jpeg"><img alt="../_images/ingest_config.jpeg" class="bordered-image" src="../_images/ingest_config.jpeg" style="width: 500px;" /></a>
</div>
<p>单击 <cite>融合数据</cite>. 任务完成后, 点击 <cite>探索数据</cite>.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/explore.jpeg"><img alt="../_images/explore.jpeg" class="bordered-image" src="../_images/explore.jpeg" style="width: 500px;" /></a>
</div>
<p>执行您在屏幕上看到的查询. 您将看到您刚刚准备的数据已写入表中!</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/result.jpeg"><img alt="../_images/result.jpeg" class="bordered-image" src="../_images/result.jpeg" style="width: 700px;" /></a>
</div>
</div>
</div>
</div>

</div>
    <div class="col-md-2">
      <div id="right-sidebar" class="bs-sidenav scrollable-y" role="complementary">
        <div id="localtoc-scrollspy">
        </div>
      </div>
    </div></div>
</div>
<!-- block main content end -->
<!-- block footer -->
<footer class="footer">
      <div class="container">
        <div class="row">
          <div class="col-md-2 footer-left"><a title="Example: Building a Stock Selection Pipeline" href="stocks.html" />Previous</a></div>
          <div class="col-md-8 footer-center"><a class="footer-tab-link" href="../table-of-contents/../../reference-manual/licenses/index.html">Copyright</a> &copy; 2014-2020 Cask Data, Inc.&bull; <a class="footer-tab-link" href="//docs.cask.co/cdap/6.1.1/cdap-docs-6.1.1-web.zip" rel="nofollow">Download</a> an archive or
<a class="footer-tab-link" href="//docs.cask.co/cdap">switch the version</a> of the documentation
          </div>
          <div class="col-md-2 footer-right"><a title="数据预处理文档" href="../data-preparation/index.html" />Next</a></div>
        </div>
      </div>
    </footer>
<!-- block footer end -->
<script type="text/javascript" src="../_static/bootstrap-3.3.6/js/bootstrap.min.js"></script><script type="text/javascript" src="../_static/js/bootstrap-sphinx.js"></script><script type="text/javascript" src="../_static/js/abixTreeList-2.js"></script><script type="text/javascript" src="../_static/js/cdap-dynamicscrollspy-4.js"></script><script type="text/javascript" src="../_static/js/cdap-version-menu.js"></script><script type="text/javascript" src="../_static/js/copy-to-clipboard.js"></script><script type="text/javascript" src="../_static/js/jquery.mousewheel.min.js"></script><script type="text/javascript" src="../_static/js/jquery.mCustomScrollbar.js"></script><script type="text/javascript" src="../_static/js/js.cookie.js"></script><script type="text/javascript" src="../_static/js/tabbed-parsed-literal-0.2.js"></script><script type="text/javascript" src="../_static/js/cdap-onload-javascript.js"></script><script type="text/javascript" src="../_static/js/cdap-version-menu.js"></script>
    <script src="https://cdap.gitee.io/docs/cdap/json-versions.js"/></script>
  </body>
</html>